export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000
export VLLM_ENFORCE_CUDA_GRAPH=1

vllm serve "/home/weights/Qwen2.5-VL-32B-Instruct" --served_model_name "qwenvl" -tp 4 --max-model-len $[32*1024] --disable_log_stats --trust-remote-code --host 0.0.0.0 --port 8000

